library(dplyr)
library(lubridate)
library(zoo)
library(ggplot2)
library(limma)
library(ggpubr)
library(grid)
library(plotly)

#Data Files and prep work
source("../../lib/DataProccess.R")
source("../../lib/NormFuncs.R")
source("../../lib/OutlierDetectionFuncs.R")
source("../../lib/DataPathName.R")
BaseDir <- params$BaseDir#get the root of the directory where the data is stored

“Files Used:”

COVID-19_WastewaterAnalysis/data/processed/LIMSWasteData_02-09-22.csv

RankingDF <- LIMSFullDF%>%
  #filter(Date<mdy("10/31/2021"))%>%
  group_by(Site)%>%
  arrange(Date)%>%
  mutate(N1 = ifelse(!is.na(N1),N1,0))%>%
  mutate(N2 = ifelse(!is.na(N2),N2,0))%>%
  mutate(N1RankLeft = rank(desc(N1 - lag(N1))),
         N1RankRight = rank(desc(N1 - lead(N1))),
         N2RankLeft = rank(desc(N2 - lag(N2))),
         N2RankRight = rank(desc(N2 - lead(N2))))%>%
  select(Date,Site,N1RankLeft,N1RankRight,N2RankLeft,N2RankRight,N1,N2)%>%
  mutate(MaxN1Rank = -pmax(N1RankLeft,N1RankRight,N2RankLeft,N2RankRight),
         Flag = ifelse(MaxN1Rank < 25,"Flagged", "NotFlagged"))


BreakUsed <- c(.95,.98)
Vec <- PlotlyView(RankingDF, "MaxN1Rank", QuintileBound, Threshold = BreakUsed)
Vec[[1]]
Vec[[2]]
Vec[[3]]%>%
  arrange(FlagScheme)%>%
  select(Date,Site,FlagScheme,MaxN1Rank)%>%
  rename(ErrorLevel = FlagScheme, Rank = `MaxN1Rank`)%>%
  ungroup()%>%
  mutate(Rank = 31 - rank(Rank, ties.method = "first"),
         ErrorLevel = as.integer(ErrorLevel)-1)%>%
  arrange(ErrorLevel, Rank)#%>%
  #write.csv("RmdOutput/PossibleOutliers.csv")